Challenge I
- Import bike-share excel file, then put the last_updated column to a readable datetime value.
- Import excel file
library(readxl)
BShare_xls <- read_excel('bike-share.xlsx')
#str(BShare_xls) # alternative to function head
- Readable Last_updated column
BShare_xls$last_updated<-as.POSIXct(BShare_xls$last_updated,origin = '1970-01-01')
head(BShare_xls,3)
- Using the iot excel dataset, modify the device column by removing the colon that separate each term (for example 1c:bf:ce:15:ec:4d becomes 1cbfce15ec4d).
- Import excel file
iot1<-read_excel('iot.xlsx')
head(iot1,3)
- Modify device column
library(stringr)
library(purrr)
library(rex)
##
## Attaching package: 'rex'
## The following object is masked from 'package:stringr':
##
## regex
iot1<-modify_at(iot1,'device',~str_split(.,':')) # applies the split function to the column device
new<-list()
for (i in 1:nrow(iot1)){
# paste combines the strings into one phrase
val<-paste(iot1$device[[i]][1],iot1$device[[i]][2],iot1$device[[i]][3],iot1$device[[i]][4],
iot1$device[[i]][5],iot1$device[[i]][6])
# gsub removes space between terms
val<-gsub("\\s+", "", val, perl=TRUE)
new<-append(new,val)
}
iot1$device<- unlist(new)
head(iot1,3)
Challenge II
- Remove the columns last_updated, id and ttl from the bike share excel dataset.
BShare_xls<-BShare_xls[,4:ncol(BShare_xls)]
# or
#BShare_xls<-BShare_xls[,names(BShare_xls)!=c('last_updated','ttl','id')]
head(BShare_xls,3)
- Merge the bike-share.xlsx and iot.xlsx depending on time
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:rex':
##
## matches
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
BShare_xls$ts<-BShare_xls$rec_update
BS_iot<-inner_join(BShare_xls,iot1,by='ts')
# another method
BS_iot<-merge(BShare_xls,iot1,by='ts')
#head(BS_iot,3)
- Create a new dataset which is the subset of the one in 2. where the co value is greater than the mean value of the iot.xlsx dataset.
BS_iot_m<-BS_iot[which(BS_iot$co > mean(iot1$co)),]
# or
#BS_iot_m<-subset(BS_iot, co> mean(iot1$co))
#dim(BS_iot_m)
Challenge III:
- Using iot_telemetry dataset, visualize the distribution of smoke of each device.
- read the csv data
iot_tel<-read.csv('iot_telemetry_data.csv')
head(iot_tel,3)
- Visualize the distribution
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig <- plot_ly(iot_tel,x = ~smoke,type = "histogram",color=~device, text='co', hovertext=~co)%>%layout(title='Distribution of smoke emmitted by devices')
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.